%%%%%%%%%%%%%%%%%%%%%%% file template.tex %%%%%%%%%%%%%%%%%%%%%%%%%
%
% This is a general template file for the LaTeX package SVJour3
% for Springer journals.          Springer Heidelberg 2010/09/16
%
% Copy it to a new file with a new name and use it as the basis
% for your article. Delete % signs as needed.
%
% This template includes a few options for different layouts and
% content for various journals. Please consult a previous issue of
% your journal as needed.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% First comes an example EPS file -- just ignore it and
% proceed on the \documentclass line
% your LaTeX will extract the file if required
\begin{filecontents*}{example.eps}
%!PS-Adobe-3.0 EPSF-3.0
%%BoundingBox: 19 19 221 221
%%CreationDate: Mon Sep 29 1997
%%Creator: programmed by hand (JK)
%%EndComments
gsave
newpath
  20 20 moveto
  20 220 lineto
  220 220 lineto
  220 20 lineto
closepath
2 setlinewidth
gsave
  .4 setgray fill
grestore
stroke
grestore
\end{filecontents*}
%
\RequirePackage{fix-cm}
%
%\documentclass{svjour3}                     % onecolumn (standard format)
\documentclass[smallcondensed]{svjour3}     % onecolumn (ditto)
%\documentclass[smallextended]{svjour3}       % onecolumn (second format)
%\documentclass[twocolumn]{svjour3}          % twocolumn
%
\smartqed  % flush right qed marks, e.g. at end of proof
%
\usepackage{graphicx}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{amsfonts}
\renewcommand{\algorithmicrequire}{ \textbf{Input:}} 
\renewcommand{\algorithmicensure}{ \textbf{Output:}} 
%
% \usepackage{mathptmx}      % use Times fonts if available on your TeX system
%
% insert here the call for the packages your document requires
%\usepackage{latexsym}
% etc.
%
% please place your own definitions here and don't use \def but
% \newcommand{}{}
%
% Insert the name of "your journal" with
% \journalname{myjournal}
%
\begin{document}

\title{Using GBDT for Accurate Emotion Recognition on Short-term Multimodal Physiological Signals%\thanks{Grants or other notes
%about the article that should go on the front page should be
%placed here. General acknowledgments should be placed at the end of the article.}
}

%\titlerunning{Short form of title}        % if too long for running head

\author{First Author         \and
        Second Author %etc.
}

%\authorrunning{Short form of author list} % if too long for running head

\institute{F. Author \at
              first address \\
              Tel.: +123-45-678910\\
              Fax: +123-45-678910\\
              \email{fauthor@example.com}           %  \\
%             \emph{Present address:} of F. Author  %  if needed
           \and
           S. Author \at
              second address
}

\date{Received: date / Accepted: date}
% The correct dates will be entered by the editor


\maketitle

\begin{abstract}
Recently, emotion recognition based on physiological signals has been paid more attention. A novel emotion recognition system based on short-term physiological signals is proposed in this paper. The required length of signals is reduced to 15 seconds to enable the system to identify emotions in real time. The physiological signals are collected from 29 subjects in four induced emotions (happiness, fear, sadness and anger) by measuring subject's electrocardiogram (ECG), galvanic skin response (GSR), electromyography (EMG) and photoplethysmography (PPG). Research shows all these physiological signals can reflect the emotion state. And we combine kernel principal component analysis (KPCA) and tree-based ensemble models to improve the performance of emotion recognition based on physiological signals. KPCA was adopted to excavate nonlinear information of the physiological dataset and remove the relevance between various features. Tree-based ensemble models including random forest and gradient boosting decision tree (GBDT) was used as pattern classifiers to deal with unrelated features. Correct classification ratios of random forest and GBDT based on KPCA were 90.04\% and 93.42\% for the recognition of four categories respectively, which prove the effectiveness of the tree-based ensemble models based on KPCA in the classification. And the result also shows that emotion recognition based on short-term physiological signals is feasible and effective.

\keywords{Emotion recognition \and Multimodal physiological signals \and Kernel PCA \and Tree-based ensemble models}
% \PACS{PACS code1 \and PACS code2 \and more}
% \subclass{MSC code1 \and MSC code2 \and more}
\end{abstract}

\section{Introduction}
\label{introduction}
Human-computer interaction (HCI) studies the ways in which humans make, or do not make, user of computational artifacts, systems and infrastructures. In doing so, much of the research in the field seeks to improve human-computer interaction by improving the usability of computer interface \cite{Grudin1992Utility}. Among them, making machine has the ability to recognize emotion is a very effective way, as machine can give more appropriate feedback based on the emotion of people. So accurate emotion recognition is one of the most important issues in human-computer interaction. Emotion is a complex and conscious process, it plays an important role in human interaction and decision-making, and can be expressed either verbally through emotional vocabulary or by expressing nonverbal cues such as speech, gesture and facial expressions \cite{Liu2014EEG}. There were many researches in the field of emotion recognition, which are based on different expressions of human such as speech, gesture and facial expression. These studies have made great progress in this field, but there are a few limitations. For example, people can deliberately hide their inner emotions and some patients may not be able to speak, make expressions, or act. So physiological signals emerge as the times require. \\
    \indent Recently, physiological psychology studies show that the influence of emotion on the activity of the nervous system (ANS) is effectively reflected in the physiological signals employed. So physiological signal is one of the most robust emotional channel for recognizing inner emotion states even if human social mask was expressed \cite{Kim2008Emotion}. It is sufficient to show that emotion recognition based on physiological signals is scientific, feasible and effective. As signal processing and physiological psychology of emotion are advancing, there are growing opportunities for automatic emotion recognition systems using multimodal physiological signals. So far, there have been successful research breakthroughs on multimodal emotion recognition. However little attention has been paid to short-term physiological signals. One problem with current systems is the required length of signals. At present, at least 2-5 minutes of signal monitoring is required for a decision \cite{Picard2001Toward}. In order to realize real-time human-computer interaction, the required monitoring time must be reduced further. Another problem is the classification problems, which is also the last and most important step in emotion recognition. Classification accuracy is a fundamental measure of the quality of an emotion recognition system. Many researchers have done a lot of work in improving the accuracy of classification in this field. It has been proven classifier combination using voting strategy is a remarkably effective and simple method. And the prediction results of ensemble classifier usually depend on the performance of a single classifier and voting strategy. \\
    \indent In this paper, a novel emotion recognition system based on the short-term multimodal physiological signals is present. We utilized KPCA and tree-based ensemble classifier model (random forest and GBDT) using cross-validation to improve the precision of the emotion recognition system, and the results show that this model has a very good performance on our dataset. Figure 1 illustrates the overall process of our emotion recognition system. In the figure, (a) collects and segments subject's multimodal physiological signals in four emotions; (b) preprocesses the segment data, then detects characteristic waveform of ECG and PPG; (c) extracts features from four physiological signals and fuses all features to construct the physiological dataset; (d) reduces dimension with KPCA; (e) estimates tree-based ensemble classifier model to predict the emotion category of testing dataset. \\
    \indent Our contribution are that (a) we successfully collected 29 subject's four physiological signals (ECG, EMG, GSR, PPG) in four induced emotions including happiness, fear, sadness, anger. And we extracted features from 10-20s physiological signal segments to construct emotional physiological dataset, which has 623 samples including 105 samples of happy, 185 samples of fear, 165 samples of sad and 168 samples of anger; (b) the performance of our model using KPCA and tree-based ensemble classifier was proved, which achieved 93.42\% on classification on four categories; (c) it was proved that emotion recognition based on short-term physiological signals is feasible and effective. \\
    \indent The following paper is arranged as follows: In section 2, we give a brief overview of related works in the field of emotion recognition based on physiological signals. Section 3 presents KPCA and tree-based ensemble classifier model with the recognition results we achieved. Section 4 describes the acquisition experiment of physiological signals, which uses video segment as induction. Section 5 concludes our work and outline further work.

\begin{figure}
% Use the relevant command to insert your figure file.
% For example, with the graphicx package use
  \includegraphics[width=1.0\textwidth]{picture/process_little.png}
% figure caption is below the figure
\caption{The process of emotion recognition system}
\label{process of system}       % Give a unique label
\end{figure}

\section{Related Work}
\label{related work}
As signal processing and sensor technology are advancing, we can continuously record subject's emotional-relevant physiological changes as long as he wears bio-sensors, and many researchers are paying more attention to emotion recognition based on physiological signals. Picard (2001) used pictures to induce and collected four physiological signals by measuring EMG, Blood Volume Pulse (BVP), GSR and Respiration (RSP) \cite{Picard2001Toward}. L Li (2006) proposed to recognize emotion using four physiological signals (ECG, skin temperature (SKT), skin conductance (SC) and RSP) obtained from multiple subjects without much discomfort from the body surface \cite{Li2006Emotion}. JS Tsai (2009) proposed an emotion recognition system with consideration of facial expression and physiological signals including SC, finger temperature and heart rate \cite{Tsai2009Emotion}. Our research builds upon the works of KH Kim. Kim (2004) provide an emotion recognition system using short-term monitoring (50s) of physiological signals \cite{Kim2004Emotion}. The system adopted support vector machine as a pattern classifier and input signals were ECG, SKT variation and electrodermal activity (EDA). Correct-classification ratios for 50 subjects were 78.4\% and 61.8\%, for the recognition of three and four categories, respectively. We all focus mainly on classification of short-term physiological signals. Previous studies have shown that emotion recognition through physiological signals is feasible.

\subsection{Multimodal physiological signals}
\label{sec:2.1}
Acquisition of a high-quality database of physiological signals is the first step for the emotion recognition. And the selection of physiological signals directly affects the accuracy of emotion recognition. In the field of affective computing, the emotion recognition system based on physiological signals often uses the autonomic nervous system signals, such as electrocardiogram (ECG), galvanic skin response (GSR), electromyography (EMG), Respiration (RSP), skin temperature (SKT), photoplethysmography (PPG) and so on. These signals all reflect the activity of the autonomic nervous system which plays a major role in maintaining the internal equilibrium of the body. \\
    \indent In this paper, we collect four physiological signals, which are ECG, facial EMG, GSR and PPG. Here, we briefly explain the underlying rationale of correlation between emotion and the adopted signals. \\
    \indent  Electrocardiogram (ECG) and photoplethysmography (PPG) are one of the most important physiological signals which has been subject to a high degree of attention in emotion recognition field. They are the digital representation of heart activity, their features represent activities of the heart. And the heart rate is dependent on the activity level of the autonomic nervous system. Heart rate can reflect a person's emotional state to a certain degree. The increase in heart rate under fear stimuli is typical example of this phenomenon. \\
    \indent Facial electromyography (EMG) is a signal that can be easily be measured from the face surface. It is caused by the activity of facial muscle contraction and reflects the state of the nerves, muscles. It can reflect the changes of emotion from facial expressions. \\
    \indent Galvanic skin response (GSR) is another signal that can easily be measured from the body surface and represents the activity of the autonomic nervous system. It characterises changes in the electrical properties of the skin due to the activity of sweat glands and is physically interpreted as conductance. Sweat glands distributed on the skin receive input from the sympathetic nervous system only, and thus this is a good indicator of arousal level due to external sensory and cognitive stimuli. It has frequently been adopted for polygraphic lie detectors \cite{Li2002A}.
    
\subsection{Emotion stimuli}
\label{sec:2.2}
The subjects of our experiment are not professional actors, they are not good at expressing particular emotions. To obtain effective emotional physiological signals, it is necessary to first induce the targeted emotions of the subjects. It is well known that emotion stimuli can have a great effect on the activity of the autonomic nervous system \cite{Andreassi2000Psychophysiology}. The increase in heart rate and blood pressure and the enlargement of pupil diameter under fear stimuli are typical examples of this phenomenon. Therefore, the selection of inducement is the primary factor of emotion recognition research. At present, the widely used induction techniques are divided into three categories. The first way is to watch pictures \cite{Alzoubi2011Affective,Haag2012Emotion}, videos \cite{Bailenson2008Real}, or listen to music \cite{Kim2008Emotion}. The public international affective picture system(IAPS) \cite{Bradley1988International} is often used as a picture inducement. And the affective electroencephalogram(EEG) benchmark database DEAP collects subject's electroencephalogram and peripheral physiological signals by video simulation \cite{Koelstra2012DEAP}. The second way is to imitate reality, including sensory stimulation (such as darkness, noise stimulation), driving \cite{Healey2005Detecting}, human-machine interaction \cite{Mohammad2010Using}, playing computer games \cite{Mandryk2006Using}, and doing long and cumbersome cognitive tasks \cite{Kim2004Emotion}. The third way is to self imagination and memory, including imagine some kind of emotional picture or recollecting previous experiences. Induction effect of these methods have been verified. Among them, the methods of using pictures, audio and video are more common and convenient according to the investigation related work. But we concluded that visual stimulation using still images and auditory stimulation using still music were not sufficient for effective emotion induction. So in this paper, we adapt video segments as stimulation, which is a multimodal (audio, visual and cognitive) approach to evoke specific targeted emotional statuses. 

\subsection{Emotion annotation}
\label{sec:2.3}
The annotation of physiological signals is different from the emotion annotation based on image, audio and video materials. It's not a direct, obvious process. The labeling of physiological signals can be referred to the emotional type contained in the inducement. However, different subjects may have different cognition and interpretations of the inducible material. So it's unavoidable that the induced emotions of subjects are different from each other. This will cause the number of  samples for each emotion category is different. \\
    \indent At present, most of the research work is to combine the following three annotation methods to make emotional annotation to physiological signals. The first is that label physiological signals based on audio and video information recorded synchronously. For example, the emotional types of the subjects can be labeled on the basis of facial expressions. But it is difficult to distinguish the type of expression, when the emotional intensity is weak and the facial expression is not obvious. And because of the differences in character, identity and cultural background, especially in eastern and western cultures, the emotional intensity of different subjects can not be determined by expression. So some scholars just regard this method as a means of assistance. The second is with the help of professionals with psychological knowledge. The third approach is to regard the subjective emotional experience report of the subjects as the annotation of emotions. In the experiment, the subjects were asked to record the current emotional type or pleasure, arousal state. There are discrete forms of records, such as low, median, and high, or divided into several levels(such as 0~9, 0 represents low and 9 represents high). There are also continuous forms using continuous values. The research shows that it is more effective to label the physiological signals according to subjective experience of the subjects.

\section{Algorithm}
\label{algorithm}
To improve the classification accuracy of emotion recognition system, we proposed a combined model, which adopt the tree-based ensemble classifier based on KPCA. As we know, features extracted from the same signals are often correlated and this will affects classification performance. Many researchers adapt feature reduction to solve this problem, but they often ignore the nonlinear relationship between features. In this paper, KPCA was used to min nonlinear information of features and remove the relevance between various features. After the KPCA is processed, we can obtain a new dataset with independent attribute. For classification, we adopted ensemble classifier include based on decision tree, including random forest and GBDT. Because decision tree can handle unrelated features and the ensemble method can significantly improve the classification accuracy. At last, we compared the classification results of random forest and GBDT. Figure \ref{algorithm of model} shows the procedure of algorithm.
\begin{figure}
% Use the relevant command to insert your figure file.
% For example, with the graphicx package use
  \includegraphics[width=1.0\textwidth]{picture/algorithm.pdf}
% figure caption is below the figure
\caption{Block diagram of KPCA and tree-based ensemble classifier model}
\label{algorithm of model}       % Give a unique label
\end{figure}
\subsection{Kernel PCA}
\label{sec:3.1}
In the field of multivariate statistics, kernel principal component analysis (KPCA) is an nonlinear extension of PCA using techniques of kernel methods. It has many applications including denoising, compression and structured prediction. And KPCA as a nonlinear feature extractor has proven proven powerful as a preprocessing step for classification algorithms \cite{Mika1999Kernel}. \\
    \indent To understand the utility of KPCA \cite{Hoffmann2007Kernel,Sch1998Nonlinear}, we assume a distribution consisting of n data points $x_{i}\in\mathbb{R}^{d}$. Before performing a PCA, these data points are mapped into a higher-dimensional feature space $\mathcal{F}$ using a kernel function $k \left ( x, y \right )$,
    \begin{equation}
    \label{eq:1}
    x_{i}\rightarrow\Phi\left ( x_{i} \right )
    \end{equation}
    \indent In this space, standard PCA is performed. In KPCA, an eigenvector $V$ of the covariance matrix in  is a linear combination of points $\Phi\left ( x_{i} \right )$,
    \begin{equation}
    \label{eq:2}
    V= \begin{matrix} \sum_{k=1}^N \alpha _{i} \widetilde{\Phi} \left ( x_{i} \right )\end{matrix}
    \end{equation}
with
    \begin{equation}
    \label{eq:3}
    {\Phi} \left ( x_{i} \right ) = \Phi \left ( x_{i} \right ) - \frac{1}{n} \begin{matrix} \sum_{r=1}^n \Phi \left ( x_{r} \right ).\end{matrix}
    \end{equation}
The vectors $\widetilde{\Phi} \left ( x_{i} \right )$ are chosen such that they are centered around the origin in $\mathcal{F}$. The  are the components of a vector $\alpha$. It turns out that this vector is an eigenvector of the matrix $\widetilde{K_{ij}} = \left ( \widetilde{\Phi} \left ( x_{i} \right ) \cdot \widetilde{\Phi} \left ( x_{j} \right )\right )$. The length of $\alpha$ is chosen such that the principal components $V$ have unit length: 
    \begin{equation}
    \label{eq:4}
    \left \| V \right \| = 1 \Leftrightarrow \left \| \alpha \right \|^{2} = \frac{1}{\lambda},
    \end{equation}
with $\lambda$ being the eigenvalue of $\widetilde{K}$ corresponding to $\alpha$. To compute $\widetilde{K}$, we substitute $\widetilde{\Phi}$ according to equation \ref{eq:3}. This substitution gives $\widetilde{K}_{ij}$ as a function of the kernel matrix $K_{ij} = k \left ( x_{i}, x_{j} \right )$:
    \begin{equation}
    \label{eq:5}
    \widetilde{K}_{ij} = K_{ij} -\frac{1}{n} \begin{matrix} \sum_{r=1}^{n} K_{rj} \end{matrix} + \frac{1}{n^2} \begin{matrix} \sum_{r,s = 1}^{n} K_{rs} \end{matrix}.
    \end{equation}
In this paper, we use the frequently used (Gaussian) radial basis kernel function $k\left ( x, y \right ) = e^{\frac{-\left \| x - y \right \|^{2}}{2 \sigma^{2}}}$ to map our 136 dimensional dataset into infinite dimension space, making the dataset easier to distinguish, and then stand PCA is performed to reduce feature dimension.

\subsection{Tree-based ensemble model}
\label{sec:3.2}
As the ensemble learning model can not only ensure high accuracy but also prevent overfitting, we adopted the two most popular ensemble models as the classifier of our emotion system. These two ensemble models are random forest and GBDT, which have shown very good performance on the classification and regression tasks in many data mining competitions organized by KDD Cup and Kaggle. These two ensemble classifiers are all constructed by multiple decision trees, but use a different ensemble learning framework. \\
\indent Random forest uses the bagging framework. The training dataset of each base model is generated from the original dataset using randomly repeat sampling. The selection of features for each base model is also the same. The final prediction results for testing dataset is made by taking the majority vote on the results of all base models. The algorithm is detailed in algorithm \ref{alg:1}. \\
\indent GBDT uses the boosting framework. The training dataset of each base model is also generated from the original dataset with a coordinated strategy. First, all samples are given the same initial weight which is representing the probability of the selection, then improve the weights of the correct classified samples and reduce the weights of the wrong classified samples in each iteration based on the classification results of the last iteration. And in each iteration, the base models are updated in the direction of the reduced pseudo residuals. The final prediction results for testing dataset is a linear synthesis of the results of all base models. The algorithm is shown in algorithm \ref{alg:2}. \\

\begin{algorithm}
\caption{Random forest}
\label{alg:1}
\begin{algorithmic}
\Require \\
1. Training dataset $S = \left \{ \left ( x_{i}, y_{i} \right ) \right \}_{i=1}^{n}, \left ( X, Y \right ) \in \mathbb{R}^{d} \times R$ \\
2. Testing Sample $x_{i} \in \mathbb{R}^{d}$
\For {$m = 1$ to $N$} \\
	\quad (1) Randomly repeat sampling is used for the original training dataset $S$ to generate the training dataset $S_{i}$ \\
	\quad (2) Using $S_{i}$ to generate a tree  without pruning: \\
	\qquad a. Randomly selecting $M$ features from the $d$ features \\
	\qquad b. Select the optimal features from $M$ features according to the gini index on each node \\
	\qquad c. Divide until the tree grows to the maximum
\EndFor
\Ensure \\
1. The set of trees $\left \{ h_{i} \right \}_{i=1}^{N}$. \\
2. For testing sample $x_{i}$, the decision tree $h_{i}$ outputs $h_{i} \left ( x_{i} \right )$ \\
3. \Return classification: $f\left ( x_{i} \right ) = majority vote\left \{ h_{i}\left ( x_{i} \right ) \right \}_{i=1}^{N}$
\end{algorithmic} 
\end{algorithm}

\begin{algorithm}
\caption{Gradient boosting decision tree}
\label{alg:2}
\begin{algorithmic}
\Require \\
1. Training set $S = \left \{ \left ( x_{i}, y_{i} \right ) \right \}_{i=1}^{n}$, a predict model $F\left ( x \right )$, a differentiable loss function $L\left ( y, F\left ( x \right ) \right )$, number of iterators $M$, learning rate $v$ \\
2. Testing Sample $x_{i} \in \mathbb{R}^{d}$ \\
3. Initialize model with a constant value: \\
\quad $F_{0}\left ( x \right ) = \arg \min \limits_{\gamma} \begin{matrix} \sum_{i=1}^{n} L\left ( y_{i}, \gamma \right )\end{matrix}$
\For {$m = 1$ to $N$} \\
	\quad (1) Compute pseudo-residuals: \\
	 \  \  \qquad $r_{im} = -\left [ \frac{\partial \left ( y_{i}, F\left ( x_{i} \right ) \right )}{\partial F\left ( x_{i} \right )} \right ]_{F\left ( x \right ) = F_{m-1} \left (x  \right )}$ for $i = 1, \cdot \cdot \cdot n$ \\
	\quad (2) Fit a base learner (decision tree) $h_{m}\left ( x \right )$ to pseudo-residuals, i.e. train it using the training set $\left \{ \left ( x_{i},r_{im} \right ) \right \}_{i=1}^{n}$ generated from the original training dataset $S$ using boosting sampling \\
	\quad (3) Compute multiplier  by solving the following one-dimensional optimization problem: \\
	 \  \  \qquad $\gamma_{m} = \arg \min_{\gamma} \begin{matrix} \sum_{i=1}^{n} L\left ( y_{i}, F_{m-1}\left ( x_{i} \right ) + \gamma h_{m}\left ( x_{i} \right ) \right )\end{matrix}$ \\
	\quad (4) Update the model: \\
	 \  \  \qquad $F_{m}\left ( x \right ) = F_{m-1}\left ( x \right ) + \nu  \gamma_{m} h_{m}\left ( x \right )$, $0 < \nu < 1$
\EndFor
\Ensure \\
1. The set of trees $\left \{ h_{i} \right \}_{i=1}^{N}$. \\
2. For testing sample $x_{i}$, the decision tree $h_{i}$ outputs $h_{i} \left ( x_{i} \right )$ \\
3. \Return $F_{N} \left ( x_{i} \right)$
\end{algorithmic} 
\end{algorithm}

\section{Experiment}
\label{experiment}

\subsection{collection of multimodal physiological signals}
\label{sec:4.1}
\paragraph{Subjects} 
The group of subjects included 29 students, 15 male and 14 female, which come from software college, Tianjin University, P R China. They are aged from 18 to 30 years (mean = 22.97, standard deviation (std) = 2.83). They are healthy both physically and psychologically and they can express emotions normally.
\paragraph{Emotion Induction}
For the stimuli video, we choose a movie clips of Prick silk man for happiness, a movie clips of the grudge for fear, a movie clips of the Tangshan earthquake for sadness, a movie clips of the silenced for anger and a one minute static picture as a transition. These movie clips are very representative. Scientific research indicates that a person's concentration of energy is 20-30 minutes. Considering the subjects patience,  each movie clips plays about 4 minutes in happiness, fear, sadness and anger sequence. And the internal of each two induction clips plays a one minute static picture to make subject calm down. The whole stimuli video lasts about 20 minutes.
\paragraph{Experimental scene and instruments}
The emotion induction experiment was arranged in a soundproof room. The physiological signals include ECG, EMG, GSR and PPG were acquired using BIOPAC MP150 system. The sampling rate was fixed at 200 hz for all channels. According to different physiological signals, we set up corresponding high-pass filter, low-pass filter and gain. In addition, we also used two computers. One computer was used to record signals from BIOPAC MP150 system, another computer with a camera was used to play emotional stimulation and record the expression of the subjects synchronously. Figure \ref{experimental platform} shows the experimental platform.

\begin{figure}
  \includegraphics[width=1.0\textwidth]{picture/platform.pdf}
\caption{The experimental platform}
\label{experimental platform} 
\end{figure}

\begin{figure}
  \includegraphics[width=1.0\textwidth]{picture/experiment.jpeg}
\caption{A subject in emotion induction experiment}
\label{experiment} 
\end{figure}

\paragraph{Experiment procedure}
At the beginning of experiment, we explain the experiment process for subjects and help subjects wear physiological signal sensors. Subsequently, about 20 minutes emotional stimulus was applied. The subjects were requested to be as relaxed as possible during this period.  Figure \ref{experiment} shows a subject in the emotion induction experiment. After the experiment, participants were asked to fill out the self emotional experience feedback form. In the subsequent steps, the report of the subjects' self emotional experience and the experimental record of the expression video will be used as the basis for the annotation of the emotional types of physiological signals.

\subsection{Preprocessing}
\label{sec:4.2}
Physiologically, the intensity of physiological signal is relatively weak, it is easily interfered by noise. Besides, the electromagnetic phenomenon of the experimental instrument, the power frequency interference and the action of the subjects will all interfere with the physiological signals. Therefore, the preprocessing of physiological signals is an indispensable step in the experiment. \\
    \indent Firstly, adaptive FIR filtering was used to elimination interference between channels. Adaptive filtering is a signal processing technique that processes two different signals in relation to one another. Then taking into account the need for real time, we intercept about 15s signal segment for each sample according to the subject's self-assessment report and synchronized expression video. The subsequent processing is all based on these signal fragments. \\
    \indent For ECG and PPG signals, the moving average method is used for smoothing, wavelet decomposition was used to remove baseline drift and detect characteristic wavefrom (R waveform in ECG and P waveform in PPG). Figure \ref{ECG and PPG processing} shows the preprocessing of ECG and PPG signals. For EMG signal, a low pass Butterworth filter with 0.4 Hz was used to remove noise. And for GSR signal, a low pass Butterworth filter with 0.3 Hz was used for smoothing.

\begin{figure}
  \includegraphics[width=1.0\textwidth]{picture/processing.png}
\caption{Preprocessing of ECG and PPG}
\label{ECG and PPG processing} 
\end{figure}

\subsection{Feature extraction}
\label{sec:4.3}
Augsburg Biosignal Toolbox (AuBT) \cite{WagnerAugsburg} is a toolbox for analyzing physiological signals in the face of emotion recognition. After preprocessing, we use AuBT to extract emotion-relative features from physiological signal segments. A total 136 features were extracted including 80 ECG features, 20 EMG features, 17 GSR features and 19 PPG features. The self assessment report and expression video of the subjects was used for induced emotion annotation. Then, the physiological dataset was constructed based these features and emotion annotation. The number of samples on happiness, fear, sad and anger are 105, 185, 168 and 165 respectively.

\paragraph{ECG feature extraction}
 A typical electrocardiogram is composed of 5 basic waveforms, P wave, Q wave, R wave, S wave and T wave. The features of ECG include the time interval and amplitude characteristic of each wave, heart rate and heart rate variability (HRV). Heart rate is generally used to distinguish positive and negative emotions. Heart rate variability refers to the concussion of the time interval of a continuous heartbeat, reflecting the psychological stress of adults. The initial features selected include the 13 underlying characteristics of the adjacent heart beat intervals, the heart rate variability, and the high-level features extracted from the 13 underlying features. Table \ref{ECGfeature} presents the details of ECG features. And pNN50 is a common indicator of HRV, which represents number of pairs of adjacent R-R intervals differing by more than 50 ms divided by the total number of R-R intervals
% For tables use
\begin{table}
% table caption is above the table
\caption{Features extracted from ECG signal}
\label{ECGfeature}       % Give a unique label
% For LaTeX tables use
\begin{tabular}{lll}
\hline\noalign{\smallskip}
Underlying features & Statistical features  \\
\noalign{\smallskip}\hline\noalign{\smallskip}
R & Mean, Median, Std, Min, Max, Range \\
P & Mean, Median, Std, Min, Max, Range \\
Q & Mean, Median, Std, Min, Max, Range \\
S & Mean, Median, Std, Min, Max, Range \\
T & Mean, Median, Std, Min, Max, Range \\
PQ & Mean, Median, Std, Min, Max, Range \\
QS & Mean, Median, Std, Min, Max, Range \\
ST & Mean, Median, Std, Min, Max, Range \\
P amplitude & Mean, Median, Std, Min, Max, Range \\
R amplitude & Mean, Median, Std, Min, Max, Range \\
S amplitude & Mean, Median, Std, Min, Max, Range \\
HRV & Mean, Median, Std, Min, Max, Range, pNN50, Frequency spectrum \\
Hrv distribution & Mean, Median, Std, Min, Max, Range, Triind \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}

\paragraph{GSR feature extraction}
21 time domain and frequency domain features are extracted from GSR signal, shown in table. The features of the time-domain signal after filtering are extracted, including three underlying features: the raw signal GSR, the first order differential(GSR-1Diff) and the two order differential(GSR-2Diff) of GSR. Each underlying feature consists of 7 statistical features, namely, mean, median, std, min, max, minRatio and maxRatio. Table \ref{GSRfeature} presents the details of ECG features.
\begin{table}
% table caption is above the table
\caption{Features extracted from GSR signal}
\label{GSRfeature}       % Give a unique label
% For LaTeX tables use
\begin{tabular}{lll}
\hline\noalign{\smallskip}
Underlying features & Statistical features  \\
\noalign{\smallskip}\hline\noalign{\smallskip}
GSR-1Diff & Mean, Median, Std, Min, Max, MinRatio, MaxRatio \\
GSR-2Diff & Mean, Median, Std, Min, Max, MinRatio, MaxRatio \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}

\paragraph{EMG feature extraction}
The feature extraction of the EMG signal is the same as that of the GSR signal, shown in table \ref{EMGfeature}.
\begin{table}
% table caption is above the table
\caption{Features extracted from EMG signal}
\label{EMGfeature}       % Give a unique label
% For LaTeX tables use
\begin{tabular}{lll}
\hline\noalign{\smallskip}
Underlying features & Statistical features  \\
\noalign{\smallskip}\hline\noalign{\smallskip}
EMG-1Diff & Mean, Median, Std, Min, Max, MinRatio, MaxRatio \\
EMG-2Diff & Mean, Median, Std, Min, Max, MinRatio, MaxRatio \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}

\paragraph{PPG feature extraction}
PPG signal is mainly composed of P waves. So features of the PPG signal mainly include the statistical features of the P wave and the HRV features extracted from the P wave. Shown in table \ref{PPGfeature}.

\begin{table}
% table caption is above the table
\caption{Features extracted from PPG signal}
\label{PPGfeature}       % Give a unique label
% For LaTeX tables use
\begin{tabular}{lll}
\hline\noalign{\smallskip}
Underlying features & Statistical features  \\
\noalign{\smallskip}\hline\noalign{\smallskip}
P-PPG & Mean, Median, Std, Min, Max, Range \\
HRV-PPG & Mean, Median, Std, Min, Max, Range, Frequency spectrum \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}

\subsection{Results Analysis}
\label{sec:4.4}
In this section, we compare our classification results for both random forest and GBDT using 10-fold cross validation. First, we evaluate our random forest model and some of the various configurations of decision tree and parameters we experimented with, as detailed in table \ref{RFclassification}. The table \ref{RFclassification} illustrates various random forest models used for emotion classification on the original dataset and the new dataset after KPCA processing. Then we evaluate our GBDT model in the same way, as detailed in table \ref{GBDTclassification}. As is evident from these two table the size of estimators, max depth and max features have an effect on the classification accuracy than estimators, and the max depth and max features are more important to the classification accuracy than estimators. For GBDT, the learning rate also has a great impact on the classification accuracy. And from table \ref{RFclassification} and \ref{GBDTclassification} we can easily find that KPCA has greatly improved the prediction accuracy of our models. \\
\indent To reach this final configuration we did multiple experiments varying estimator sizes and other parameters. Our best random forest model has estimators of size 200, with max depth of 2 and max features of 30, achieves 90.04\% accuracy. And our best gradient boosting decision with the same configurations of decision tree and learning rate of 0.1 achieves 93.42\% accuracy. As can be clearly seen from table \ref{RFclassification} and \ref{GBDTclassification}, both the random forest and the GBDT models show good classification performance on our dataset, which prove the effectiveness of the tree-based ensemble models based on KPCA in the classification. Figure \ref{comparison} present comparison of two models based on KPCA in classification accuracy, abscissa represents parameters of model. For example, (50, 2, 10) represents 50 estimators, max depth of 2 and max features of 10. And the learning rate of gradient boosting decision tree model is 0.1. It's obvious from figure \ref{comparison} that the gradient boosting decision tree model seems to provide generally better accuracy in comparison to random forest model. We think there are two main reasons. The first reason is sampling method for each decision tree. The sampling methods of GBDT and random forest are boosting sampling and random sampling respectively. And the number of samples for each emotion is different in our physiological dataset.
In theory, boosting sampling is better than random sampling on this unbalance dataset. The second is model updating method. The establishment of each decision tree in random forest is random and unrelated to each other. And each decision tree in GBDT is built in the direction of the decrease of the residual. For these two reasons, we think GBDT has better classification performance on our physiological dataset.

\begin{table}
\caption{Comparison between different random forest models for classification based on KPCA}
\label{RFclassification}       % Give a unique label
\begin{tabular}{lllll}
\hline\noalign{\smallskip}
Estimators & Max depth & Max features & Accuracy without KPCA & Accuracy with KPCA \\
\noalign{\smallskip}\hline\noalign{\smallskip}
50 & 2 & 10 & 47.66\% & 75.77\% \\
50 & 3 & 10 & 51.36\% & 81.56\% \\
50 & 4 & 10 & 54.62\% & 83.30\% \\
50 & 4 & 20 & 57.78\% & 88.46\% \\
50 & 4 & 30 & 55.72\% & 88.92\% \\
100 & 4 & 30 & 58.25\% & 89.73\% \\
200 & 4 & 30 & 55.37\% & 90.04\% \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}

\begin{table}
\caption{Comparison between different GBDT models for classification}
\label{GBDTclassification}       % Give a unique label
\begin{tabular}{llllll}
\hline\noalign{\smallskip}
Learning rate & Estimators & Max depth & Max features & Accuracy without KPCA & Accuracy with KPCA \\
\noalign{\smallskip}\hline\noalign{\smallskip}
0.1 & 50 & 2 & 10 & 61.47\% & 86.05\% \\
0.1 & 50 & 3 & 10 & 63.64\% & 88.75\% \\
0.1 & 50 & 4 & 10 & 64.36\% & 88.25\% \\
0.1 & 50 & 4 & 20 & 64.83\% & 90.69\% \\
0.1 & 50 & 4 & 30 & 65.48\% & 91.50\% \\
0.1 & 100 & 4 & 30 & 66.26\% & 92.46\% \\
0.1 & 200 & 4 & 30 & 67.40\% & 93.42\% \\
0.01 & 200 & 4 & 30 & 63.23\% & 92.46\% \\
0.001 & 200 & 4 & 30 & 56.50\% & 89.57\% \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}

\begin{figure}
  \includegraphics[width=1.0\textwidth]{picture/comparison2.png}
\caption{Comparison of classification accuracy of random forest and GBDT based on KPCA}
\label{comparison} 
\end{figure}

\section{Conclusion}
\label{conclusion}
In this paper, we collects multimodal physiological signals (ECG, GSR, EMG and PPG) from 29 subjects in four induced emotions (happiness, fear, sadness and anger),  and propose a novel emotion recognition system based on the processing of these four physiological signals. This system shows a very high recognition rate on our physiological dataset. Our system reduce the required length of signals and prove the effectiveness of the tree-based ensemble models based on KPCA in the classification. KPCA was used to excavate the nonlinear information of the physiological dataset and remove the relevance between various features. Random forest and GBDT was utilized as pattern classifiers to deal with unrelated features and achieve a high accuracy rate. Correct classification ratios of random forest and GBDT based on KPCA were 90.04\% and 93.42\%, respectively. As physiological signals are complex, more effective emotional features should be extracted from multimodal physiological signals in the future emotion recognition system.

%\begin{acknowledgements}
%If you'd like to thank anyone, place your comments here
%and remove the percent signs.
%\end{acknowledgements}

% BibTeX users please use one of
%\bibliographystyle{spbasic}      % basic style, author-year citations
\bibliographystyle{spmpsci}      % mathematics and physical sciences
%\bibliographystyle{spphys}       % APS-like style for physics
\bibliography{references}   % name your BibTeX data base

% Non-BibTeX users please use
%\begin{thebibliography}{}
%
% and use \bibitem to create references. Consult the Instructions
% for authors for reference list style.
%
%\bibitem{RefJ}
% Format for Journal Reference
%Author, Article title, Journal, Volume, page numbers (year)
% Format for books
%\bibitem{RefB}
%Author, Book title, page numbers. Publisher, place (year)
% etc
%\end{thebibliography}

\end{document}
% end of file template.tex

